AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n");
}
-static void clear_iommu_l1e_present(u64 l2e, unsigned long gfn)
+/* Given pfn and page table level, return pde index */
+static unsigned int pfn_to_pde_idx(unsigned long pfn, unsigned int level)
{
- u32 *l1e;
- int offset;
- void *l1_table;
+ unsigned int idx;
- l1_table = map_domain_page(l2e >> PAGE_SHIFT);
-
- offset = gfn & (~PTE_PER_TABLE_MASK);
- l1e = (u32*)(l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
+ idx = pfn >> (PTE_PER_TABLE_SHIFT * (--level));
+ idx &= ~PTE_PER_TABLE_MASK;
+ return idx;
+}
- /* clear l1 entry */
- l1e[0] = l1e[1] = 0;
+void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+{
+ u64 *table, *pte;
- unmap_domain_page(l1_table);
+ table = map_domain_page(l1_mfn);
+ pte = table + pfn_to_pde_idx(gfn, IOMMU_PAGING_MODE_LEVEL_1);
+ *pte = 0;
+ unmap_domain_page(table);
}
-static int set_iommu_l1e_present(u64 l2e, unsigned long gfn,
- u64 maddr, int iw, int ir)
+static bool_t set_iommu_pde_present(u32 *pde, unsigned long next_mfn,
+ unsigned int next_level,
+ bool_t iw, bool_t ir)
{
- u64 addr_lo, addr_hi, maddr_old;
+ u64 addr_lo, addr_hi, maddr_old, maddr_next;
u32 entry;
- void *l1_table;
- int offset;
- u32 *l1e;
- int need_flush = 0;
-
- l1_table = map_domain_page(l2e >> PAGE_SHIFT);
+ bool_t need_flush = 0;
- offset = gfn & (~PTE_PER_TABLE_MASK);
- l1e = (u32*)((u8*)l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
+ maddr_next = (u64)next_mfn << PAGE_SHIFT;
- addr_hi = get_field_from_reg_u32(l1e[1],
+ addr_hi = get_field_from_reg_u32(pde[1],
IOMMU_PTE_ADDR_HIGH_MASK,
IOMMU_PTE_ADDR_HIGH_SHIFT);
- addr_lo = get_field_from_reg_u32(l1e[0],
+ addr_lo = get_field_from_reg_u32(pde[0],
IOMMU_PTE_ADDR_LOW_MASK,
IOMMU_PTE_ADDR_LOW_SHIFT);
- maddr_old = ((addr_hi << 32) | addr_lo) << PAGE_SHIFT;
+ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
- if ( maddr_old && (maddr_old != maddr) )
+ if ( maddr_old != maddr_next )
need_flush = 1;
- addr_lo = maddr & DMA_32BIT_MASK;
- addr_hi = maddr >> 32;
-
- set_field_in_reg_u32((u32)addr_hi, 0,
- IOMMU_PTE_ADDR_HIGH_MASK,
- IOMMU_PTE_ADDR_HIGH_SHIFT, &entry);
- set_field_in_reg_u32(iw ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_DISABLED, entry,
- IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
- IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT, &entry);
- set_field_in_reg_u32(ir ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_DISABLED, entry,
- IOMMU_PTE_IO_READ_PERMISSION_MASK,
- IOMMU_PTE_IO_READ_PERMISSION_SHIFT, &entry);
- l1e[1] = entry;
-
- set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
- IOMMU_PTE_ADDR_LOW_MASK,
- IOMMU_PTE_ADDR_LOW_SHIFT, &entry);
- set_field_in_reg_u32(IOMMU_PAGING_MODE_LEVEL_0, entry,
- IOMMU_PTE_NEXT_LEVEL_MASK,
- IOMMU_PTE_NEXT_LEVEL_SHIFT, &entry);
- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
- IOMMU_PTE_PRESENT_MASK,
- IOMMU_PTE_PRESENT_SHIFT, &entry);
- l1e[0] = entry;
-
- unmap_domain_page(l1_table);
- return need_flush;
-}
-
-static void amd_iommu_set_page_directory_entry(u32 *pde,
- u64 next_ptr, u8 next_level)
-{
- u64 addr_lo, addr_hi;
- u32 entry;
-
- addr_lo = next_ptr & DMA_32BIT_MASK;
- addr_hi = next_ptr >> 32;
+ addr_lo = maddr_next & DMA_32BIT_MASK;
+ addr_hi = maddr_next >> 32;
/* enable read/write permissions,which will be enforced at the PTE */
set_field_in_reg_u32((u32)addr_hi, 0,
IOMMU_PDE_ADDR_HIGH_MASK,
IOMMU_PDE_ADDR_HIGH_SHIFT, &entry);
- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ set_field_in_reg_u32(iw, entry,
IOMMU_PDE_IO_WRITE_PERMISSION_MASK,
IOMMU_PDE_IO_WRITE_PERMISSION_SHIFT, &entry);
- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ set_field_in_reg_u32(ir, entry,
IOMMU_PDE_IO_READ_PERMISSION_MASK,
IOMMU_PDE_IO_READ_PERMISSION_SHIFT, &entry);
pde[1] = entry;
IOMMU_PDE_PRESENT_MASK,
IOMMU_PDE_PRESENT_SHIFT, &entry);
pde[0] = entry;
+
+ return need_flush;
+}
+
+static bool_t set_iommu_pte_present(unsigned long pt_mfn, unsigned long gfn,
+ unsigned long next_mfn, int pde_level,
+ bool_t iw, bool_t ir)
+{
+ u64 *table;
+ u32 *pde;
+ bool_t need_flush = 0;
+
+ table = map_domain_page(pt_mfn);
+
+ pde = (u32*)(table + pfn_to_pde_idx(gfn, pde_level));
+
+ need_flush = set_iommu_pde_present(pde, next_mfn,
+ IOMMU_PAGING_MODE_LEVEL_0, iw, ir);
+ unmap_domain_page(table);
+ return need_flush;
}
void amd_iommu_set_root_page_table(
return ptr;
}
+static unsigned int iommu_next_level(u32 *entry)
+{
+ return get_field_from_reg_u32(entry[0],
+ IOMMU_PDE_NEXT_LEVEL_MASK,
+ IOMMU_PDE_NEXT_LEVEL_SHIFT);
+}
+
static int amd_iommu_is_pte_present(u32 *entry)
{
- return (get_field_from_reg_u32(entry[0],
- IOMMU_PDE_PRESENT_MASK,
- IOMMU_PDE_PRESENT_SHIFT));
+ return get_field_from_reg_u32(entry[0],
+ IOMMU_PDE_PRESENT_MASK,
+ IOMMU_PDE_PRESENT_SHIFT);
}
void invalidate_dev_table_entry(struct amd_iommu *iommu,
send_iommu_command(iommu, cmd);
}
-static u64 iommu_l2e_from_pfn(struct page_info *table, int level,
- unsigned long io_pfn)
+/* For each pde, We use ignored bits (bit 1 - bit 8 and bit 63)
+ * to save pde count, pde count = 511 is a candidate of page coalescing.
+ */
+static unsigned int get_pde_count(u64 pde)
+{
+ unsigned int count;
+ u64 upper_mask = 1ULL << 63 ;
+ u64 lower_mask = 0xFF << 1;
+
+ count = ((pde & upper_mask) >> 55) | ((pde & lower_mask) >> 1);
+ return count;
+}
+
+/* Convert pde count into iommu pte ignored bits */
+static void set_pde_count(u64 *pde, unsigned int count)
{
- unsigned long offset;
- void *pde = NULL;
- void *table_vaddr;
- u64 next_table_maddr = 0;
- unsigned int lowest = 1;
+ u64 upper_mask = 1ULL << 8 ;
+ u64 lower_mask = 0xFF;
+ u64 pte_mask = (~(1ULL << 63)) & (~(0xFF << 1));
+
+ *pde &= pte_mask;
+ *pde |= ((count & upper_mask ) << 55) | ((count & lower_mask ) << 1);
+}
+
+/* Return 1, if pages are suitable for merging at merge_level.
+ * otherwise increase pde count if mfn is contigous with mfn - 1
+ */
+static int iommu_update_pde_count(struct domain *d, unsigned long pt_mfn,
+ unsigned long gfn, unsigned long mfn,
+ unsigned int merge_level)
+{
+ unsigned int pde_count, next_level;
+ unsigned long first_mfn;
+ u64 *table, *pde, *ntable;
+ u64 ntable_maddr, mask;
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ bool_t ok = 0;
+
+ ASSERT( spin_is_locked(&hd->mapping_lock) && pt_mfn );
- BUG_ON( table == NULL || level < lowest );
+ next_level = merge_level - 1;
- if ( level == lowest )
- return page_to_maddr(table);
+ /* get pde at merge level */
+ table = map_domain_page(pt_mfn);
+ pde = table + pfn_to_pde_idx(gfn, merge_level);
- while ( level > lowest )
+ /* get page table of next level */
+ ntable_maddr = amd_iommu_get_next_table_from_pte((u32*)pde);
+ ntable = map_domain_page(ntable_maddr >> PAGE_SHIFT);
+
+ /* get the first mfn of next level */
+ first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
+
+ if ( first_mfn == 0 )
+ goto out;
+
+ mask = (1ULL<< (PTE_PER_TABLE_SHIFT * next_level)) - 1;
+
+ if ( ((first_mfn & mask) == 0) &&
+ (((gfn & mask) | first_mfn) == mfn) )
{
- offset = io_pfn >> ((PTE_PER_TABLE_SHIFT *
- (level - IOMMU_PAGING_MODE_LEVEL_1)));
- offset &= ~PTE_PER_TABLE_MASK;
+ pde_count = get_pde_count(*pde);
+
+ if ( pde_count == (PTE_PER_TABLE_SIZE - 1) )
+ ok = 1;
+ else if ( pde_count < (PTE_PER_TABLE_SIZE - 1))
+ {
+ pde_count++;
+ set_pde_count(pde, pde_count);
+ }
+ }
+
+ else
+ /* non-contiguous mapping */
+ set_pde_count(pde, 0);
+
+out:
+ unmap_domain_page(ntable);
+ unmap_domain_page(table);
+
+ return ok;
+}
+
+static int iommu_merge_pages(struct domain *d, unsigned long pt_mfn,
+ unsigned long gfn, unsigned int flags,
+ unsigned int merge_level)
+{
+ u64 *table, *pde, *ntable;
+ u64 ntable_mfn;
+ unsigned long first_mfn;
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
- table_vaddr = __map_domain_page(table);
- pde = table_vaddr + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE);
- next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+ ASSERT( spin_is_locked(&hd->mapping_lock) && pt_mfn );
- if ( !amd_iommu_is_pte_present(pde) )
+ table = map_domain_page(pt_mfn);
+ pde = table + pfn_to_pde_idx(gfn, merge_level);
+
+ /* get first mfn */
+ ntable_mfn = amd_iommu_get_next_table_from_pte((u32*)pde) >> PAGE_SHIFT;
+
+ if ( ntable_mfn == 0 )
+ {
+ unmap_domain_page(table);
+ return 1;
+ }
+
+ ntable = map_domain_page(ntable_mfn);
+ first_mfn = amd_iommu_get_next_table_from_pte((u32*)ntable) >> PAGE_SHIFT;
+
+ if ( first_mfn == 0 )
+ {
+ unmap_domain_page(ntable);
+ unmap_domain_page(table);
+ return 1;
+ }
+
+ /* setup super page mapping, next level = 0 */
+ set_iommu_pde_present((u32*)pde, first_mfn,
+ IOMMU_PAGING_MODE_LEVEL_0,
+ !!(flags & IOMMUF_writable),
+ !!(flags & IOMMUF_readable));
+
+ amd_iommu_flush_all_pages(d);
+
+ unmap_domain_page(ntable);
+ unmap_domain_page(table);
+ return 0;
+}
+
+/* Walk io page tables and build level page tables if necessary
+ * {Re, un}mapping super page frames causes re-allocation of io
+ * page tables.
+ */
+static int iommu_pde_from_gfn(struct domain *d, unsigned long pfn,
+ unsigned long pt_mfn[])
+{
+ u64 *pde, *next_table_vaddr;
+ unsigned long next_table_mfn;
+ unsigned int level;
+ struct page_info *table;
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+ table = hd->root_table;
+ level = hd->paging_mode;
+
+ BUG_ON( table == NULL || level < IOMMU_PAGING_MODE_LEVEL_1 ||
+ level > IOMMU_PAGING_MODE_LEVEL_6 );
+
+ next_table_mfn = page_to_mfn(table);
+
+ if ( level == IOMMU_PAGING_MODE_LEVEL_1 )
+ {
+ pt_mfn[level] = next_table_mfn;
+ return 0;
+ }
+
+ while ( level > IOMMU_PAGING_MODE_LEVEL_1 )
+ {
+ unsigned int next_level = level - 1;
+ pt_mfn[level] = next_table_mfn;
+
+ next_table_vaddr = map_domain_page(next_table_mfn);
+ pde = next_table_vaddr + pfn_to_pde_idx(pfn, level);
+
+ /* Here might be a super page frame */
+ next_table_mfn = amd_iommu_get_next_table_from_pte((uint32_t*)pde)
+ >> PAGE_SHIFT;
+
+ /* Split super page frame into smaller pieces.*/
+ if ( amd_iommu_is_pte_present((u32*)pde) &&
+ (iommu_next_level((u32*)pde) == 0) &&
+ next_table_mfn != 0 )
{
- if ( next_table_maddr == 0 )
+ int i;
+ unsigned long mfn, gfn;
+ unsigned int page_sz;
+
+ page_sz = 1 << (PTE_PER_TABLE_SHIFT * (next_level - 1));
+ gfn = pfn & ~((1 << (PTE_PER_TABLE_SHIFT * next_level)) - 1);
+ mfn = next_table_mfn;
+
+ /* allocate lower level page table */
+ table = alloc_amd_iommu_pgtable();
+ if ( table == NULL )
+ {
+ AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
+ unmap_domain_page(next_table_vaddr);
+ return 1;
+ }
+
+ next_table_mfn = page_to_mfn(table);
+ set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
+ !!IOMMUF_writable, !!IOMMUF_readable);
+
+ for ( i = 0; i < PTE_PER_TABLE_SIZE; i++ )
+ {
+ set_iommu_pte_present(next_table_mfn, gfn, mfn, next_level,
+ !!IOMMUF_writable, !!IOMMUF_readable);
+ mfn += page_sz;
+ gfn += page_sz;
+ }
+
+ amd_iommu_flush_all_pages(d);
+ }
+
+ /* Install lower level page table for non-present entries */
+ else if ( !amd_iommu_is_pte_present((u32*)pde) )
+ {
+ if ( next_table_mfn == 0 )
{
table = alloc_amd_iommu_pgtable();
if ( table == NULL )
{
- printk("AMD-Vi: Cannot allocate I/O page table\n");
- return 0;
+ AMD_IOMMU_DEBUG("Cannot allocate I/O page table\n");
+ unmap_domain_page(next_table_vaddr);
+ return 1;
}
- next_table_maddr = page_to_maddr(table);
- amd_iommu_set_page_directory_entry(
- (u32 *)pde, next_table_maddr, level - 1);
+ next_table_mfn = page_to_mfn(table);
+ set_iommu_pde_present((u32*)pde, next_table_mfn, next_level,
+ !!IOMMUF_writable, !!IOMMUF_readable);
}
else /* should never reach here */
- return 0;
+ {
+ unmap_domain_page(next_table_vaddr);
+ return 1;
+ }
}
- unmap_domain_page(table_vaddr);
- table = maddr_to_page(next_table_maddr);
+ unmap_domain_page(next_table_vaddr);
level--;
}
- return next_table_maddr;
+ /* mfn of level 1 page table */
+ pt_mfn[level] = next_table_mfn;
+ return 0;
}
static int update_paging_mode(struct domain *d, unsigned long gfn)
struct page_info *new_root = NULL;
struct page_info *old_root = NULL;
void *new_root_vaddr;
- u64 old_root_maddr;
+ unsigned long old_root_mfn;
struct hvm_iommu *hd = domain_hvm_iommu(d);
level = hd->paging_mode;
}
new_root_vaddr = __map_domain_page(new_root);
- old_root_maddr = page_to_maddr(old_root);
- amd_iommu_set_page_directory_entry((u32 *)new_root_vaddr,
- old_root_maddr, level);
+ old_root_mfn = page_to_mfn(old_root);
+ set_iommu_pde_present(new_root_vaddr, old_root_mfn, level,
+ !!IOMMUF_writable, !!IOMMUF_readable);
level++;
old_root = new_root;
offset >>= PTE_PER_TABLE_SHIFT;
+ unmap_domain_page(new_root_vaddr);
}
if ( new_root != NULL )
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags)
{
- u64 iommu_l2e;
- int need_flush = 0;
+ bool_t need_flush = 0;
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ unsigned long pt_mfn[7];
+ unsigned int merge_level;
BUG_ON( !hd->root_table );
if ( iommu_hap_pt_share && is_hvm_domain(d) )
return 0;
+ memset(pt_mfn, 0, sizeof(pt_mfn));
+
spin_lock(&hd->mapping_lock);
/* Since HVM domain is initialized with 2 level IO page table,
{
if ( update_paging_mode(d, gfn) )
{
+ spin_unlock(&hd->mapping_lock);
AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
domain_crash(d);
return -EFAULT;
}
}
- iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
- if ( iommu_l2e == 0 )
+ if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
{
spin_unlock(&hd->mapping_lock);
AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
return -EFAULT;
}
- need_flush = set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT,
+ /* Install 4k mapping first */
+ need_flush = set_iommu_pte_present(pt_mfn[1], gfn, mfn,
+ IOMMU_PAGING_MODE_LEVEL_1,
!!(flags & IOMMUF_writable),
!!(flags & IOMMUF_readable));
- if ( need_flush )
- amd_iommu_flush_pages(d, gfn, 0);
+ /* Do not increase pde count if io mapping has not been changed */
+ if ( !need_flush )
+ goto out;
+
+ amd_iommu_flush_pages(d, gfn, 0);
+
+ for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+ merge_level <= hd->paging_mode; merge_level++ )
+ {
+ if ( pt_mfn[merge_level] == 0 )
+ break;
+ if ( !iommu_update_pde_count(d, pt_mfn[merge_level],
+ gfn, mfn, merge_level) )
+ break;
+ /* Deallocate lower level page table */
+ free_amd_iommu_pgtable(mfn_to_page(pt_mfn[merge_level - 1]));
+
+ if ( iommu_merge_pages(d, pt_mfn[merge_level], gfn,
+ flags, merge_level) )
+ {
+ spin_unlock(&hd->mapping_lock);
+ AMD_IOMMU_DEBUG("Merge iommu page failed at level %d, "
+ "gfn = %lx mfn = %lx\n", merge_level, gfn, mfn);
+ domain_crash(d);
+ return -EFAULT;
+ }
+ }
+
+out:
spin_unlock(&hd->mapping_lock);
return 0;
}
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
{
- u64 iommu_l2e;
+ unsigned long pt_mfn[7];
struct hvm_iommu *hd = domain_hvm_iommu(d);
BUG_ON( !hd->root_table );
if ( iommu_hap_pt_share && is_hvm_domain(d) )
return 0;
+ memset(pt_mfn, 0, sizeof(pt_mfn));
+
spin_lock(&hd->mapping_lock);
/* Since HVM domain is initialized with 2 level IO page table,
{
if ( update_paging_mode(d, gfn) )
{
+ spin_unlock(&hd->mapping_lock);
AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
domain_crash(d);
return -EFAULT;
}
}
- iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
-
- if ( iommu_l2e == 0 )
+ if ( iommu_pde_from_gfn(d, gfn, pt_mfn) || (pt_mfn[1] == 0) )
{
spin_unlock(&hd->mapping_lock);
AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn);
}
/* mark PTE as 'page not present' */
- clear_iommu_l1e_present(iommu_l2e, gfn);
+ clear_iommu_pte_present(pt_mfn[1], gfn);
spin_unlock(&hd->mapping_lock);
amd_iommu_flush_pages(d, gfn, 0);